# -*-encoding:utf-8-*-
#coding=utf-8

import sys
import locale
import BaseHTTPServer
import urlparse
from urllib import unquote
import time
from SocketServer import ThreadingMixIn
import threading
import logging
import logging.config
import requests
from string import Template
import json
from WeiboWenZhang import crawler_weibo_wenzhang
from ctypes import cdll  
from requests.packages.urllib3.exceptions import InsecureRequestWarning

sys.path.insert(0, "/usr/lib")
import libgolaxynpce
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
reload(sys)
sys.setdefaultencoding('utf-8')

logging.config.fileConfig("./logger.conf")
logger = logging.getLogger("example")

page_html_tpl ='''<html>
<title>NPCE正文抽取组件演示DEMO </title>
<body>
<center>
<h2> NPCE正文抽取-V1.0演示 </h2>
<hr>
    <form action="/" method="get">
       <table>
	 <tr><td>URL:</td><td> <input type="text" name="url" size="80" value="{u}" >&nbsp;&nbsp;<input type="submit" value="抽取" /></td></tr>
         <tr><td>标题:</td><td> <input type="text" size="80" value="{t}"></td></tr>
         <tr><td>来源:</td><td> <input type="text" size="30"  value="{src}"></td></tr>
         <tr><td>时间:</td><td> <input type="text" size="30" value="{pt}"></td></tr>
         <tr><td>正文:</td><td> <textarea rows="16" cols="100">{c}</textarea></td></tr>
	</table>
    </form>
<hr>
<strong>contact:inrgihc@126.com</strong>
</center>
</body>
</html>'''

class WebRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
	
	def log_message(self, format, *args):
		logger.info("Client:[%s] - Request URI: [%s]\n" %(self.client_address[0],format%args))

	def do_GET(self):
        	(scheme, netloc, path, params, query, fragment) = urlparse.urlparse(self.path)
		params=dict([(k, v[0]) for k, v in urlparse.parse_qs(query).items()])
                logger.info('Get data params : %s'%str(params))

                cur_thread = threading.currentThread()
                logger.info('Handle Thread:%s' % cur_thread.getName())
	
		if params.has_key('url') :
			success=False
			url=unquote(params['url'])
			html=None

			"""
			First , Load Page use param url
			"""
			if url.startswith('https://weibo.com/ttarticle/p/show?'):
				success=True
				html=crawler_weibo_wenzhang(url)
			elif url.startswith('https://weibo.com'):
				success=False
				page_html=page_html_tpl.format(u=url,t='Error:不支持的微博抽取地址!!!',src='',pt='',c='')
			else:
                		resp=self.Download(url=url,timeout=120,allow_redirects=False)
                		if resp != None :
                        		if 200!=resp.status_code :
						success=False
						page_html=page_html_tpl.format(u=url,
								t='Error:下载页面时的HTTP状态码为%d!!!'%resp.status_code,
								src='',pt='',c='')
					else:
						success=True
						print "http status is %d"%resp.status_code
						html=resp.content
				else:
					success=False
					page_html=page_html_tpl.format(u=url,t='Error:下载页面异常!!!',src='',pt='',c='')

			"""
			Second, Extract page content by html
			"""
			if success==True :
				if  html!= None:
					content = libgolaxynpce.extract(url, html,0,0)
					data = json.loads(content)
					if 0 == data['status']:
						page_html=page_html_tpl.format(u=data['doc']['url'],
							t=data['doc']['title'],
							src=data['doc']['source'],
							pt=data['doc']['pubtime'],
							c=data['doc']['content'])
					else:
						page_html=page_html_tpl.format(u=url,t='Error:抽取页面失败!!!',src='',pt='',c='')		
				else:
					page_html=page_html_tpl.format(u=url,t='Error:下载页面失败!!!',src='',pt='',c='')
			else:
				pass
		else:
			page_html=page_html_tpl.format(u='',t='',src='',pt='',c='')	
		
		self.send_response(200)
		self.send_header("Content-type", "text/html; charset=utf-8")
		self.send_header("Content-Length", str(len(page_html)))
		self.end_headers()
		self.wfile.write(page_html)		

	def do_POST(self):
        	(scheme, netloc, path, params, query, fragment) = urlparse.urlparse(self.path)
        	body_length = self.headers.getheader('content-length');
        	body_nbytes = int(body_length)
        	post_data = self.rfile.read(body_nbytes)

        	cur_thread = threading.currentThread()
        	logger.info('Handle Thread:%s' % cur_thread.getName())

		params=dict([(k, v[0]) for k, v in urlparse.parse_qs(post_data).items()])
		logger.info('Post data params : %s'%str(params))
	
		if 0==cmp(path,'/npce'):
			return self.Handle_npce(params)	
		
		message_body = '{"status":-1,"errmsg":"Unkown URI"}'
		return self.Response(0,message_body)
	
	def Handle_npce(self,params):
		if False==params.has_key('url'):
			message_body = '{"status":-1,"errmsg":"Param url not exist"}'
			return self.Response(0,message_body)

		url=unquote(params['url'])
		img=1 if params.has_key('img') and 0==cmp(params['img'],'1') else 0
		flg=1 if params.has_key('flg') and 0==cmp(params['flg'],'1') else 0

		success=False
		html=None

                """
                First , Load Page use param url
                """
		if url.startswith('https://weibo.com/ttarticle/p/show?'):
                	success=True
                        html=crawler_weibo_wenzhang(url)
                elif url.startswith('https://weibo.com'):
                	success=False
			message_body = '{"status":-1,"errmsg":"Unsupported for weibo extract"}'
		else:
			resp=self.Download(url,timeout=120,allow_redirects=False)
			if resp==None:
				success=False
				message_body = '{"status":-1,"errmsg":"Exception happened when load page"}'
			else:
				if 200!=resp.status_code :
					message_body = '{"status":-1,"errmsg":"Load page response http code is %d"}'%resp.status_code
					success=False
				else:
					success=True
					html=resp.content

		"""
		Second,Extract page content by html
		"""
		if True==success:
			if None!=html:
				result = libgolaxynpce.extract(url, html, img, flg)
				return self.Response(0,result)
			else:
				pass
		else:	
			pass
                
		return self.Response(0,message_body)	

	def Response(self,errno,message_body):
        	self.send_response(200)
		self.send_header("Content-type", 'application/json;charset=utf-8')
		self.send_header("Content-Length", str(len(message_body)))
		self.send_header('Server','BaseHttpNpceServer1.0')
        	self.end_headers()
        	self.wfile.write(message_body)

	def Download(self, url, payload={}, timeout=90, allow_redirects=False):
        	try:
            		response = requests.get(
                		url,
                		headers={'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'},
                		params=payload,
				allow_redirects=allow_redirects,
                		timeout=timeout,
				verify=False)
            		return response
        	except requests.exceptions.ConnectionError:
            		logging.warning('http_get timeout, url: %s' % url)
            		return None
        	except Exception, e:
            		logging.exception('error in httpget: %s' % url)
            		return None

class ThreadingHttpServer(ThreadingMixIn, BaseHTTPServer.HTTPServer):
    pass


if __name__ == '__main__':
	'program main entance function!'
	port_num = 7645
	if len(sys.argv) <2 :
		print 'Usage :%s [port_num]'%sys.argv[0]
	elif len(sys.argv) ==2 :
		if locale.atoi(sys.argv[1])<1024 :
			print 'Invalid server port number(>1024):%s'% sys.argv[1]
		else:
			port_num=locale.atoi(sys.argv[1])
			# server = BaseHTTPServer.HTTPServer(('0.0.0.0',18460), WebRequestHandler)
			server = ThreadingHttpServer(('0.0.0.0', port_num), WebRequestHandler)
			ip, port = server.server_address
			# Start a thread with the server -- that thread will then start one
			# more thread for each request
			server_thread = threading.Thread(target=server.serve_forever)
			# Exit the server thread when the main thread terminates
			server_thread.setDaemon(True)
			server_thread.start()
			logger.info('Server is running on %s:%s'%(ip,port))

			logger.info('Server loop running in thread:'+ server_thread.getName())
			
			try:
			
				while True:
					pass
			except:
				pass

	else:
		print 'Usage :%s [port_num]'%sys.argv[0]

	logger.info('Server is shutdown ')


